#install.packages("haven")
#install.packages("missRanger")
#install.packages("dplyr")

library(haven)
library(missRanger)
library(dplyr)

#read in ketchley_wenig_CPS
ketchley_wenig_CPS <- read_dta("ketchley_wenig_CPS.dta")

#Subset to first instance in panel
subset_data <- ketchley_wenig_CPS %>%
  group_by(person_id) %>%
  slice(1) %>%
  ungroup()

#Subset to variables used for random forest
subset_data <- subset_data[c(
  "person_id",
  "exits_elite",
  "threat_index2",
  "experience_index2",
  "threat_experience",
  "threat_index_wo_positions",
  "threat_pcf",
  "experience_pcf",
  "senior_official",
  "ln_royal_orders",
  "pasha_bey",
  "military_officer",
  "positions",
  "ln_years_in_office_since_1939",
  "terminal_degree",
  "ln_individuals_in_ministry",
  "ln_dist_to_power",
  "ln_mil_experience_in_ministry",
  "nasser",
  "sheikh",
  "muslim",
  "sqrt_clubs_membership",
  "age",
  "shared_surname_FO"
)]

#Random forest
im_df <- missRanger(subset_data, 
        formula = . ~ . -person_id,
        pmm.k = 3, num.trees = 100, seed = 23071952)

#Subset to those two variables to merge back
im_df <- subset(im_df, select = c(person_id, sqrt_clubs_membership, age))

#Export to .csv
write.csv(im_df, file = "im_df.csv", row.names = FALSE)

